import numpy as np
import pandas as pd
import ast
import plotly.express as px
from plotly import graph_objects as go
df = pd.read_csv("flipkart_com-ecommerce_sample.csv")
df
| uniq_id | crawl_timestamp | product_url | product_name | product_category_tree | pid | retail_price | discounted_price | image | is_FK_Advantage_product | description | product_rating | overall_rating | brand | product_specifications | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | c2d766ca982eca8304150849735ffef9 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2FF9KEDEFGF | 999.0 | 379.0 | ["http://img5a.flixcart.com/image/short/u/4/a/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 1 | 7f7036a6d550aaa89d34c77bd39a5e48 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/fabhomedecor-fabric-do... | FabHomeDecor Fabric Double Sofa Bed | ["Furniture >> Living Room Furniture >> Sofa B... | SBEEH3QGU7MFYJFY | 32157.0 | 22646.0 | ["http://img6a.flixcart.com/image/sofa-bed/j/f... | False | FabHomeDecor Fabric Double Sofa Bed (Finish Co... | No rating available | No rating available | FabHomeDecor | {"product_specification"=>[{"key"=>"Installati... |
| 2 | f449ec65dcbc041b6ae5e6a32717d01b | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/aw-bellies/p/itmeh4grg... | AW Bellies | ["Footwear >> Women's Footwear >> Ballerinas >... | SHOEH4GRSUBJGZXE | 999.0 | 499.0 | ["http://img5a.flixcart.com/image/shoe/7/z/z/r... | False | Key Features of AW Bellies Sandals Wedges Heel... | No rating available | No rating available | AW | {"product_specification"=>[{"key"=>"Ideal For"... |
| 3 | 0973b37acd0c664e3de26e97e5571454 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2F6HUZMQ6SJ | 699.0 | 267.0 | ["http://img5a.flixcart.com/image/short/6/2/h/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 4 | bc940ea42ee6bef5ac7cea3fb5cfbee7 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/sicons-all-purpose-arn... | Sicons All Purpose Arnica Dog Shampoo | ["Pet Supplies >> Grooming >> Skin & Coat Care... | PSOEH3ZYDMSYARJ5 | 220.0 | 210.0 | ["http://img5a.flixcart.com/image/pet-shampoo/... | False | Specifications of Sicons All Purpose Arnica Do... | No rating available | No rating available | Sicons | {"product_specification"=>[{"key"=>"Pet Type",... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 19997 | 93e9d343837400ce0d7980874ece471c | 2015-12-01 10:15:43 +0000 | http://www.flipkart.com/elite-collection-mediu... | Elite Collection Medium Acrylic Sticker | ["Baby Care >> Baby & Kids Gifts >> Stickers >... | STIE7VAYDKQZEBSD | 1299.0 | 999.0 | ["http://img5a.flixcart.com/image/sticker/b/s/... | False | Buy Elite Collection Medium Acrylic Sticker fo... | No rating available | No rating available | Elite Collection | {"product_specification"=>[{"key"=>"Number of ... |
| 19998 | 669e79b8fa5d9ae020841c0c97d5e935 | 2015-12-01 10:15:43 +0000 | http://www.flipkart.com/elite-collection-mediu... | Elite Collection Medium Acrylic Sticker | ["Baby Care >> Baby & Kids Gifts >> Stickers >... | STIE8YSVEPPCZ42Y | 1499.0 | 1199.0 | ["http://img5a.flixcart.com/image/sticker/4/2/... | False | Buy Elite Collection Medium Acrylic Sticker fo... | No rating available | No rating available | Elite Collection | {"product_specification"=>[{"key"=>"Number of ... |
| 19999 | cb4fa87a874f715fff567f7b7b3be79c | 2015-12-01 10:15:43 +0000 | http://www.flipkart.com/elite-collection-mediu... | Elite Collection Medium Acrylic Sticker | ["Baby Care >> Baby & Kids Gifts >> Stickers >... | STIE88KN9ZDSGZKY | 1499.0 | 999.0 | ["http://img6a.flixcart.com/image/sticker/z/k/... | False | Buy Elite Collection Medium Acrylic Sticker fo... | No rating available | No rating available | Elite Collection | {"product_specification"=>[{"key"=>"Number of ... |
| 20000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 20001 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
20002 rows × 15 columns
df.head()
| uniq_id | crawl_timestamp | product_url | product_name | product_category_tree | pid | retail_price | discounted_price | image | is_FK_Advantage_product | description | product_rating | overall_rating | brand | product_specifications | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | c2d766ca982eca8304150849735ffef9 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2FF9KEDEFGF | 999.0 | 379.0 | ["http://img5a.flixcart.com/image/short/u/4/a/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 1 | 7f7036a6d550aaa89d34c77bd39a5e48 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/fabhomedecor-fabric-do... | FabHomeDecor Fabric Double Sofa Bed | ["Furniture >> Living Room Furniture >> Sofa B... | SBEEH3QGU7MFYJFY | 32157.0 | 22646.0 | ["http://img6a.flixcart.com/image/sofa-bed/j/f... | False | FabHomeDecor Fabric Double Sofa Bed (Finish Co... | No rating available | No rating available | FabHomeDecor | {"product_specification"=>[{"key"=>"Installati... |
| 2 | f449ec65dcbc041b6ae5e6a32717d01b | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/aw-bellies/p/itmeh4grg... | AW Bellies | ["Footwear >> Women's Footwear >> Ballerinas >... | SHOEH4GRSUBJGZXE | 999.0 | 499.0 | ["http://img5a.flixcart.com/image/shoe/7/z/z/r... | False | Key Features of AW Bellies Sandals Wedges Heel... | No rating available | No rating available | AW | {"product_specification"=>[{"key"=>"Ideal For"... |
| 3 | 0973b37acd0c664e3de26e97e5571454 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2F6HUZMQ6SJ | 699.0 | 267.0 | ["http://img5a.flixcart.com/image/short/6/2/h/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 4 | bc940ea42ee6bef5ac7cea3fb5cfbee7 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/sicons-all-purpose-arn... | Sicons All Purpose Arnica Dog Shampoo | ["Pet Supplies >> Grooming >> Skin & Coat Care... | PSOEH3ZYDMSYARJ5 | 220.0 | 210.0 | ["http://img5a.flixcart.com/image/pet-shampoo/... | False | Specifications of Sicons All Purpose Arnica Do... | No rating available | No rating available | Sicons | {"product_specification"=>[{"key"=>"Pet Type",... |
df.shape
(20002, 15)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 20002 entries, 0 to 20001 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 uniq_id 20000 non-null object 1 crawl_timestamp 20000 non-null object 2 product_url 20000 non-null object 3 product_name 20000 non-null object 4 product_category_tree 20000 non-null object 5 pid 20000 non-null object 6 retail_price 19922 non-null float64 7 discounted_price 19922 non-null float64 8 image 19997 non-null object 9 is_FK_Advantage_product 20000 non-null object 10 description 19998 non-null object 11 product_rating 20000 non-null object 12 overall_rating 20000 non-null object 13 brand 14136 non-null object 14 product_specifications 19986 non-null object dtypes: float64(2), object(13) memory usage: 2.3+ MB
df.isnull().sum()
uniq_id 2 crawl_timestamp 2 product_url 2 product_name 2 product_category_tree 2 pid 2 retail_price 80 discounted_price 80 image 5 is_FK_Advantage_product 2 description 4 product_rating 2 overall_rating 2 brand 5866 product_specifications 16 dtype: int64
df["retail_price"].fillna(df["retail_price"].median(),inplace=True)
df["discounted_price"].fillna(df["discounted_price"].median(),inplace=True)
df.head()
| uniq_id | crawl_timestamp | product_url | product_name | product_category_tree | pid | retail_price | discounted_price | image | is_FK_Advantage_product | description | product_rating | overall_rating | brand | product_specifications | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | c2d766ca982eca8304150849735ffef9 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2FF9KEDEFGF | 999.0 | 379.0 | ["http://img5a.flixcart.com/image/short/u/4/a/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 1 | 7f7036a6d550aaa89d34c77bd39a5e48 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/fabhomedecor-fabric-do... | FabHomeDecor Fabric Double Sofa Bed | ["Furniture >> Living Room Furniture >> Sofa B... | SBEEH3QGU7MFYJFY | 32157.0 | 22646.0 | ["http://img6a.flixcart.com/image/sofa-bed/j/f... | False | FabHomeDecor Fabric Double Sofa Bed (Finish Co... | No rating available | No rating available | FabHomeDecor | {"product_specification"=>[{"key"=>"Installati... |
| 2 | f449ec65dcbc041b6ae5e6a32717d01b | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/aw-bellies/p/itmeh4grg... | AW Bellies | ["Footwear >> Women's Footwear >> Ballerinas >... | SHOEH4GRSUBJGZXE | 999.0 | 499.0 | ["http://img5a.flixcart.com/image/shoe/7/z/z/r... | False | Key Features of AW Bellies Sandals Wedges Heel... | No rating available | No rating available | AW | {"product_specification"=>[{"key"=>"Ideal For"... |
| 3 | 0973b37acd0c664e3de26e97e5571454 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2F6HUZMQ6SJ | 699.0 | 267.0 | ["http://img5a.flixcart.com/image/short/6/2/h/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... |
| 4 | bc940ea42ee6bef5ac7cea3fb5cfbee7 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/sicons-all-purpose-arn... | Sicons All Purpose Arnica Dog Shampoo | ["Pet Supplies >> Grooming >> Skin & Coat Care... | PSOEH3ZYDMSYARJ5 | 220.0 | 210.0 | ["http://img5a.flixcart.com/image/pet-shampoo/... | False | Specifications of Sicons All Purpose Arnica Do... | No rating available | No rating available | Sicons | {"product_specification"=>[{"key"=>"Pet Type",... |
x=df['retail_price']-df['discounted_price']
y=(x/df['retail_price'])*100
df['discount_percentage']=y
df['timestamp'] = pd.to_datetime(df['crawl_timestamp'], errors='coerce')
# Extract the time part of the timestamp (handling NaT gracefully)
df['Time'] = df['timestamp'].apply(lambda x: x.time() if pd.notnull(x) else None)
# Extract the date part of the timestamp (handling NaT gracefully)
df['date'] = df['timestamp'].apply(lambda x: x.date() if pd.notnull(x) else None)
df.head()
| uniq_id | crawl_timestamp | product_url | product_name | product_category_tree | pid | retail_price | discounted_price | image | is_FK_Advantage_product | description | product_rating | overall_rating | brand | product_specifications | discount_percentage | timestamp | Time | date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | c2d766ca982eca8304150849735ffef9 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2FF9KEDEFGF | 999.0 | 379.0 | ["http://img5a.flixcart.com/image/short/u/4/a/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... | 62.062062 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 |
| 1 | 7f7036a6d550aaa89d34c77bd39a5e48 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/fabhomedecor-fabric-do... | FabHomeDecor Fabric Double Sofa Bed | ["Furniture >> Living Room Furniture >> Sofa B... | SBEEH3QGU7MFYJFY | 32157.0 | 22646.0 | ["http://img6a.flixcart.com/image/sofa-bed/j/f... | False | FabHomeDecor Fabric Double Sofa Bed (Finish Co... | No rating available | No rating available | FabHomeDecor | {"product_specification"=>[{"key"=>"Installati... | 29.576764 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 |
| 2 | f449ec65dcbc041b6ae5e6a32717d01b | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/aw-bellies/p/itmeh4grg... | AW Bellies | ["Footwear >> Women's Footwear >> Ballerinas >... | SHOEH4GRSUBJGZXE | 999.0 | 499.0 | ["http://img5a.flixcart.com/image/shoe/7/z/z/r... | False | Key Features of AW Bellies Sandals Wedges Heel... | No rating available | No rating available | AW | {"product_specification"=>[{"key"=>"Ideal For"... | 50.050050 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 |
| 3 | 0973b37acd0c664e3de26e97e5571454 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2F6HUZMQ6SJ | 699.0 | 267.0 | ["http://img5a.flixcart.com/image/short/6/2/h/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... | 61.802575 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 |
| 4 | bc940ea42ee6bef5ac7cea3fb5cfbee7 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/sicons-all-purpose-arn... | Sicons All Purpose Arnica Dog Shampoo | ["Pet Supplies >> Grooming >> Skin & Coat Care... | PSOEH3ZYDMSYARJ5 | 220.0 | 210.0 | ["http://img5a.flixcart.com/image/pet-shampoo/... | False | Specifications of Sicons All Purpose Arnica Do... | No rating available | No rating available | Sicons | {"product_specification"=>[{"key"=>"Pet Type",... | 4.545455 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 |
# Extract the main category from 'product_category_tree', with error handling
df['main_category'] = df['product_category_tree'].apply(lambda x: x.split('>>')[0][2:] if isinstance(x, str) else None)
df.head()
| uniq_id | crawl_timestamp | product_url | product_name | product_category_tree | pid | retail_price | discounted_price | image | is_FK_Advantage_product | description | product_rating | overall_rating | brand | product_specifications | discount_percentage | timestamp | Time | date | main_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | c2d766ca982eca8304150849735ffef9 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2FF9KEDEFGF | 999.0 | 379.0 | ["http://img5a.flixcart.com/image/short/u/4/a/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... | 62.062062 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 | Clothing |
| 1 | 7f7036a6d550aaa89d34c77bd39a5e48 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/fabhomedecor-fabric-do... | FabHomeDecor Fabric Double Sofa Bed | ["Furniture >> Living Room Furniture >> Sofa B... | SBEEH3QGU7MFYJFY | 32157.0 | 22646.0 | ["http://img6a.flixcart.com/image/sofa-bed/j/f... | False | FabHomeDecor Fabric Double Sofa Bed (Finish Co... | No rating available | No rating available | FabHomeDecor | {"product_specification"=>[{"key"=>"Installati... | 29.576764 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 | Furniture |
| 2 | f449ec65dcbc041b6ae5e6a32717d01b | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/aw-bellies/p/itmeh4grg... | AW Bellies | ["Footwear >> Women's Footwear >> Ballerinas >... | SHOEH4GRSUBJGZXE | 999.0 | 499.0 | ["http://img5a.flixcart.com/image/shoe/7/z/z/r... | False | Key Features of AW Bellies Sandals Wedges Heel... | No rating available | No rating available | AW | {"product_specification"=>[{"key"=>"Ideal For"... | 50.050050 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 | Footwear |
| 3 | 0973b37acd0c664e3de26e97e5571454 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/alisha-solid-women-s-c... | Alisha Solid Women's Cycling Shorts | ["Clothing >> Women's Clothing >> Lingerie, Sl... | SRTEH2F6HUZMQ6SJ | 699.0 | 267.0 | ["http://img5a.flixcart.com/image/short/6/2/h/... | False | Key Features of Alisha Solid Women's Cycling S... | No rating available | No rating available | Alisha | {"product_specification"=>[{"key"=>"Number of ... | 61.802575 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 | Clothing |
| 4 | bc940ea42ee6bef5ac7cea3fb5cfbee7 | 2016-03-25 22:59:23 +0000 | http://www.flipkart.com/sicons-all-purpose-arn... | Sicons All Purpose Arnica Dog Shampoo | ["Pet Supplies >> Grooming >> Skin & Coat Care... | PSOEH3ZYDMSYARJ5 | 220.0 | 210.0 | ["http://img5a.flixcart.com/image/pet-shampoo/... | False | Specifications of Sicons All Purpose Arnica Do... | No rating available | No rating available | Sicons | {"product_specification"=>[{"key"=>"Pet Type",... | 4.545455 | 2016-03-25 22:59:23+00:00 | 22:59:23 | 2016-03-25 | Pet Supplies |
df['main_category']
0 Clothing
1 Furniture
2 Footwear
3 Clothing
4 Pet Supplies
...
19997 Baby Care
19998 Baby Care
19999 Baby Care
20000 None
20001 None
Name: main_category, Length: 20002, dtype: object
n = 10
top_products = pd.DataFrame(df['main_category'].value_counts()[:n]).reset_index()
print("Before renaming:")
print(top_products)
Before renaming:
main_category count
0 Clothing 6198
1 Jewellery 3531
2 Footwear 1227
3 Mobiles & Accessories 1099
4 Automotive 1012
5 Home Decor & Festive Needs 929
6 Beauty and Personal Care 710
7 Home Furnishing 700
8 Kitchen & Dining 647
9 Computers 578
# Rename columns
top_products.columns = ['Top_Products', 'Total_count']
print("\nAfter renaming:")
print(top_products)
After renaming:
Top_Products Total_count
0 Clothing 6198
1 Jewellery 3531
2 Footwear 1227
3 Mobiles & Accessories 1099
4 Automotive 1012
5 Home Decor & Festive Needs 929
6 Beauty and Personal Care 710
7 Home Furnishing 700
8 Kitchen & Dining 647
9 Computers 578
df['brand']
0 Alisha
1 FabHomeDecor
2 AW
3 Alisha
4 Sicons
...
19997 Elite Collection
19998 Elite Collection
19999 Elite Collection
20000 NaN
20001 NaN
Name: brand, Length: 20002, dtype: object
#Top 10 main brands being purchased
n = 10
top_brands=pd.DataFrame(df['brand'].value_counts()[:n]).reset_index()
print("Before renaming:")
print(top_brands)
Before renaming:
brand count
0 Allure Auto 469
1 Regular 313
2 Voylla 299
3 Slim 288
4 TheLostPuppy 229
5 Karatcraft 211
6 Black 167
7 White 155
8 DailyObjects 144
9 Speedwav 141
# Rename columns
top_brands.columns = ['Top Brands', 'Total count']
print("\nAfter renaming:")
print(top_brands)
After renaming:
Top Brands Total count
0 Allure Auto 469
1 Regular 313
2 Voylla 299
3 Slim 288
4 TheLostPuppy 229
5 Karatcraft 211
6 Black 167
7 White 155
8 DailyObjects 144
9 Speedwav 141
from plotly.subplots import make_subplots
label1=top_products['Top_Products']
value1=top_products['Total_count']
label2=top_brands['Top Brands']
value2=top_brands['Total count']
#Create subplots
fig_both = make_subplots(rows=1, cols=2, specs=[[{'type': 'domain'}, {'type': 'domain'}]])
fig_both.add_trace(go.Pie(labels=label1, values=value1, name="Top Products", pull=[0.3, 0, 0, 0]),1,1)
fig_both.add_trace(go.Pie(labels=label2, values=value2, name="Total Brands", pull=[0.3, 0, 0, 0]), 1, 2)
#use hole to create a donut-like pie chart
fig_both.update_traces(hole=.4, hoverinfo="label+percent+name")
#fig_both.update_traces (hoverinfo="Label+percent+name")
fig_both.update_layout(
title_text="Top products and brands distribution",
#Add annotations in the center of the donut pies
annotations=[dict(text='Product', x=0.18, y=0.5, font_size=20, showarrow=False),
dict(text='Brand', x=0.82, y=0.5, font_size=20, showarrow=False)])
# Filter for high discounts (discount_percentage > 90)
df_discount = df.query('discount_percentage > 90')
# Drop rows with missing values
df_discount = df_discount.dropna()
# Correct spelling errors in the 'brand' column
df_discount["brand"].replace('FashBlush', 'Fash Blush', inplace=True)
# Calculate average discount percentage by brand
max_discount = (df_discount
.groupby('brand')[['discount_percentage']]
.mean()
.sort_values(by='discount_percentage', ascending=False)
.reset_index())
print(max_discount)
brand discount_percentage 0 Rajcrafts 96.533333 1 Bling 94.548458 2 Fash Blush 92.711714 3 Mydress Mystyle 91.991992 4 Soulful Threads 91.952663 5 Instella 91.719745 6 Bond Beatz 91.596639 7 Fashblush 91.132525 8 Black 90.681676 9 KazamaKraft 90.565618 10 Zaicus 90.143281 11 CUBA 90.045023 12 SDZ 90.045023 13 Gia 90.020004
# Create a bar plot with enhancements
fig = px.bar(
max_discount,
x='brand',
y='discount_percentage',
color='brand',
color_discrete_sequence=px.colors.qualitative.Plotly,
title='Average Discount Percentage by Brand',
labels={'discount_percentage': 'Average Discount Percentage', 'brand': 'Brand'}
)
# Update layout for better readability
fig.update_layout(
xaxis_title='Brand',
yaxis_title='Average Discount Percentage'
)
# Show the plot
fig.show()
# Group by customer and calculate total spending
df_customer = df.groupby("uniq_id")[["discounted_price"]].sum().sort_values(by=['discounted_price'], ascending=[False])
# Select the top 20 customers with the highest spending
top_20_customers = df_customer.head(20)
# Ensure 'uniq_id' is a column in the DataFrame
top_20_customers = top_20_customers.reset_index() # Reset index to make 'uniq_id' a column
# Create a bar plot
fig = px.bar(
top_20_customers,
x='uniq_id',
y='discounted_price',
color='discounted_price',
color_continuous_scale=px.colors.diverging.Portland, # Customize the color scale if needed
title='Top 20 Customers by Spending',
labels={'discounted_price': 'Total Spending', 'uniq_id': 'Customer ID'}
)
# Update layout for better readability
fig.update_layout(
xaxis_title='Customer ID',
yaxis_title='Total Spending',
xaxis_tickangle=-45 # Optional: Rotates x-axis labels for better readability
)
# Show the plot
fig.show()
# Filter for 5-star products
rating_5 = df[df['product_rating'] == '5']
# Count of main categories with 5-star ratings
top_product_type = rating_5['main_category'].value_counts()
# Count of brands with 5-star ratings
top_brand_type = rating_5['brand'].value_counts()
# Top 5 product categories
df_top_product = pd.DataFrame(top_product_type.head(5).reset_index())
df_top_product.columns = ['top_prod', 'count']
# Top 5 brands
df_top_brand = pd.DataFrame(top_brand_type.head(5).reset_index())
df_top_brand.columns = ['top_brands', 'count']
# Display the DataFrames
print(df_top_product)
print("-"*50)
print(df_top_brand)
top_prod count 0 Clothing 232 1 Jewellery 70 2 Footwear 47 3 Watches 47 4 Kitchen & Dining 37 -------------------------------------------------- top_brands count 0 Regular 21 1 Slim 13 2 Black 9 3 Bosch 9 4 JDX 8
# Concatenate the DataFrames horizontally
df_product_brand_rate5 = pd.concat([df_top_product, df_top_brand], axis=1)
# Remove rows with 'No rating available'
df.drop(df.index[df['product_rating'] == 'No rating available'], inplace=True)
# Count of each rating
ratings = pd.DataFrame(df['product_rating'].value_counts().reset_index())
ratings.columns = ['Ratings', 'Counts']
# Convert 'Ratings' to float for numerical sorting
ratings['Ratings'] = ratings['Ratings'].astype(float)
# Sort by 'Ratings' in descending order
ratings = ratings.sort_values(by=['Ratings'], ascending=[False])
# Plot the result
x = ratings['Ratings']
y = ratings['Counts']
figdot2 = go.Figure()
figdot2.add_trace(go.Scatter(
x=x,
y=y,
marker=dict(color="crimson", size=12),
mode="markers",
name="ratings",
))
figdot2.update_layout(
title="Ratings vs Count",
xaxis_title="Ratings",
yaxis_title="Count",
)
figdot2.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figdot2.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
figdot2.show()
# Group and aggregate data
df_date_retail = pd.DataFrame(df.groupby("date")[["retail_price"]].mean().reset_index())
df_date_discount = pd.DataFrame(df.groupby("date")[["discounted_price"]].mean().reset_index())
# Concatenate DataFrames
df_date_price = pd.concat([df_date_retail, df_date_discount], axis=1)
# Remove duplicate columns
df_date_price = df_date_price.loc[:, ~df_date_price.columns.duplicated()]
# Prepare data for plotting
x = df_date_price['date']
y1 = df_date_price['retail_price']
y2 = df_date_price['discounted_price']
# Create and customize plot
fig_area2 = go.Figure()
fig_area2.add_trace(go.Scatter(
x=x,
y=y1,
fill='tozeroy',
name='Retail Price',
line=dict(width=0.5, color='crimson')
))
fig_area2.add_trace(go.Scatter(
x=x,
y=y2,
fill='tozeroy',
name='Discount Price',
line=dict(width=0.5, color='darkslategray')
))
fig_area2.update_layout(
xaxis_title="Dates",
yaxis_title="Price (in 1000s)",
plot_bgcolor='white'
)
fig_area2.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig_area2.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig_area2.show()
# Create scatter plot
scat2 = px.scatter(x=df['Time'].sort_values(ascending=True), y=df['product_url'])
# Update layout
scat2.update_layout(
title='No. of clicks vs time', # Title of the plot
xaxis_title='Time', # X-axis label
yaxis_title='No. of Clicks' # Y-axis label
)
# Update axes
scat2.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
scat2.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
# Hide y-axis tick labels
scat2.update_yaxes(showticklabels=False)
# Display plot
scat2.show()
# Calculate total products
total_prod = len(df['pid'])
# Calculate total rated products
total_ratings = len(df[df['product_rating'] != 'No rating available'])
# Calculate 5-star rated products
top_ratings = len(df[df['product_rating'] == '5'])
# Prepare data for funnel plot
df_funnel_1 = pd.DataFrame({
'number': [total_prod, total_ratings, top_ratings],
'stage': ["Total Products", "Products with Ratings", "Products with 5-Star Rating"]
})
# Create and display funnel plot
funnel_1_fig = px.funnel(df_funnel_1, x='number', y='stage')
funnel_1_fig.show()